{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Building a MatGraphDB Example with MPNearHull Data\n", "\n", "In this notebook, we demonstrate how to build a materials graph database using the\n", "[MatGraphDB](https://github.com/your/matgraphdb) framework with the MPNearHull dataset.\n", "\n", "The steps include:\n", "1. Importing required libraries and setting up configuration paths.\n", "2. Downloading and extracting the dataset (and raw materials data if needed).\n", "3. Creating a MatGraphDB instance.\n", "4. Initializing node generators.\n", "5. Initializing edge generators.\n", "6. Verifying the database setup.\n", "\n", "Follow along and run each cell to see how the database is constructed." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "from pathlib import Path\n", "\n", "FILE_DIR = Path(\".\")\n", "DATA_DIR = FILE_DIR / \"data\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define Function for Downloading and Extracting Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[INFO] 2025-05-11 10:21:41 - parquetdb.utils.config[37][load_config] - Config file: C:\\Users\\lllang\\AppData\\Local\\parquetdb\\parquetdb\\config.yml\n", "[INFO] 2025-05-11 10:21:41 - parquetdb.utils.config[41][load_config] - Setting data_dir to C:\\Users\\lllang\\Desktop\\Current_Projects\\MatGraphDB\\data\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "85ff371ca9b0486cb3f75eeea12ca534", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Fetching 4 files: 0%| | 0/4 [00:00 4\u001b[0m mpdb \u001b[38;5;241m=\u001b[39m \u001b[43mMPNearHull\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstorage_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mDB_PATH\u001b[49m\u001b[43m,\u001b[49m\u001b[43minitialize_from_scratch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", "File \u001b[1;32m~\\Desktop\\Current_Projects\\MatGraphDB\\matgraphdb\\datasets\\mp_near_hull.py:45\u001b[0m, in \u001b[0;36mMPNearHull.__init__\u001b[1;34m(self, storage_path, download, from_scratch, initialize_from_scratch)\u001b[0m\n\u001b[0;32m 38\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading dataset from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrepo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 39\u001b[0m snapshot_download(\n\u001b[0;32m 40\u001b[0m repo_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrepo_id,\n\u001b[0;32m 41\u001b[0m repo_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrepo_type,\n\u001b[0;32m 42\u001b[0m local_dir\u001b[38;5;241m=\u001b[39mstorage_path,\n\u001b[0;32m 43\u001b[0m )\n\u001b[1;32m---> 45\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mstorage_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 47\u001b[0m n_edge_generators \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39medge_generator_store\u001b[38;5;241m.\u001b[39mgenerator_names)\n\u001b[0;32m 48\u001b[0m n_node_generators \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode_generator_store\u001b[38;5;241m.\u001b[39mgenerator_names)\n", "File \u001b[1;32m~\\Desktop\\Current_Projects\\MatGraphDB\\matgraphdb\\core\\matgraphdb.py:39\u001b[0m, in \u001b[0;36mMatGraphDB.__init__\u001b[1;34m(self, storage_path, materials_store, load_custom_stores, **kwargs)\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 29\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m 30\u001b[0m \u001b[38;5;124;03m----------\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[38;5;124;03m Whether to load custom stores.\u001b[39;00m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 38\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstorage_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mabspath(storage_path)\n\u001b[1;32m---> 39\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m 40\u001b[0m storage_path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstorage_path,\n\u001b[0;32m 41\u001b[0m load_custom_stores\u001b[38;5;241m=\u001b[39mload_custom_stores,\n\u001b[0;32m 42\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m 43\u001b[0m )\n\u001b[0;32m 44\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInitializing MatGraphDB at: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstorage_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 46\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmaterials_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnodes_path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmaterial\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[1;32mc:\\Users\\lllang\\miniconda3\\envs\\matgraphdb\\lib\\site-packages\\parquetdb\\graph\\parquet_graphdb.py:71\u001b[0m, in \u001b[0;36mParquetGraphDB.__init__\u001b[1;34m(self, storage_path, load_custom_stores, verbose)\u001b[0m\n\u001b[0;32m 68\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGraph directory: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 70\u001b[0m \u001b[38;5;66;03m# Initialize empty dictionaries for stores, load existing stores\u001b[39;00m\n\u001b[1;32m---> 71\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode_stores \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_load_existing_node_stores\u001b[49m\u001b[43m(\u001b[49m\u001b[43mload_custom_stores\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39medge_stores \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_load_existing_edge_stores(load_custom_stores)\n\u001b[0;32m 74\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39medge_generator_store \u001b[38;5;241m=\u001b[39m GeneratorStore(\n\u001b[0;32m 75\u001b[0m storage_path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39medge_generators_path, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose\n\u001b[0;32m 76\u001b[0m )\n", "File \u001b[1;32mc:\\Users\\lllang\\miniconda3\\envs\\matgraphdb\\lib\\site-packages\\parquetdb\\graph\\parquet_graphdb.py:182\u001b[0m, in \u001b[0;36mParquetGraphDB._load_existing_node_stores\u001b[1;34m(self, load_custom_stores)\u001b[0m\n\u001b[0;32m 180\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_load_existing_node_stores\u001b[39m(\u001b[38;5;28mself\u001b[39m, load_custom_stores: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m 181\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLoading existing node stores\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 182\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_load_existing_stores\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 183\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnodes_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43mdefault_store_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mNodeStore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_custom_stores\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_custom_stores\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\lllang\\miniconda3\\envs\\matgraphdb\\lib\\site-packages\\parquetdb\\graph\\parquet_graphdb.py:216\u001b[0m, in \u001b[0;36mParquetGraphDB._load_existing_stores\u001b[1;34m(self, stores_path, default_store_class, load_custom_stores)\u001b[0m\n\u001b[0;32m 214\u001b[0m store_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(stores_path, store_type)\n\u001b[0;32m 215\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39misdir(store_path):\n\u001b[1;32m--> 216\u001b[0m store_dict[store_type] \u001b[38;5;241m=\u001b[39m \u001b[43mload_store\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 217\u001b[0m \u001b[43m \u001b[49m\u001b[43mstore_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault_store_class\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverbose\u001b[49m\n\u001b[0;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 219\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 220\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 221\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStore path \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstore_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a directory. Likely does not exist.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 222\u001b[0m )\n", "File \u001b[1;32mc:\\Users\\lllang\\miniconda3\\envs\\matgraphdb\\lib\\site-packages\\parquetdb\\graph\\parquet_graphdb.py:950\u001b[0m, in \u001b[0;36mload_store\u001b[1;34m(store_path, default_store_class, verbose)\u001b[0m\n\u001b[0;32m 948\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m class_module \u001b[38;5;129;01mand\u001b[39;00m class_name \u001b[38;5;129;01mand\u001b[39;00m default_store_class \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 949\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mImporting class from module: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mclass_module\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 950\u001b[0m module \u001b[38;5;241m=\u001b[39m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mclass_module\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 951\u001b[0m class_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(module, class_name)\n\u001b[0;32m 952\u001b[0m store \u001b[38;5;241m=\u001b[39m class_obj(storage_path\u001b[38;5;241m=\u001b[39mstore_path)\n", "File \u001b[1;32mc:\\Users\\lllang\\miniconda3\\envs\\matgraphdb\\lib\\importlib\\__init__.py:126\u001b[0m, in \u001b[0;36mimport_module\u001b[1;34m(name, package)\u001b[0m\n\u001b[0;32m 124\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m 125\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32m:1050\u001b[0m, in \u001b[0;36m_gcd_import\u001b[1;34m(name, package, level)\u001b[0m\n", "File \u001b[1;32m:1027\u001b[0m, in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n", "File \u001b[1;32m:992\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n", "File \u001b[1;32m:241\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[1;34m(f, *args, **kwds)\u001b[0m\n", "File \u001b[1;32m:1050\u001b[0m, in \u001b[0;36m_gcd_import\u001b[1;34m(name, package, level)\u001b[0m\n", "File \u001b[1;32m:1027\u001b[0m, in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n", "File \u001b[1;32m:992\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n", "File \u001b[1;32m:241\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[1;34m(f, *args, **kwds)\u001b[0m\n", "File \u001b[1;32m:1050\u001b[0m, in \u001b[0;36m_gcd_import\u001b[1;34m(name, package, level)\u001b[0m\n", "File \u001b[1;32m:1027\u001b[0m, in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n", "File \u001b[1;32m:1004\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matgraphdb.materials'" ] } ], "source": [ "from matgraphdb.datasets import MPNearHull\n", "\n", "DB_PATH = DATA_DIR / \"MPNearHull\"\n", "mpdb = MPNearHull(storage_path=DB_PATH,initialize_from_scratch=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Initialization" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Initialize a Materials Store" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "NODE STORE SUMMARY\n", "============================================================\n", "Node type: material\n", "• Number of nodes: 80643\n", "• Number of features: 136\n", "Storage path: ..\\..\\data\\examples\\01\\material\n", "\n", "\n", "############################################################\n", "METADATA\n", "############################################################\n", "• class: MaterialStore\n", "• class_module: matgraphdb.materials.nodes.materials\n", "• node_type: material\n", "• name_column: id\n", "\n", "############################################################\n", "NODE DETAILS\n", "############################################################\n", "\n" ] } ], "source": [ "from matgraphdb import MaterialStore\n", "\n", "materials_store = MaterialStore(storage_path=MATERIALS_PATH)\n", "print(materials_store)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Initialize a MatGraphDB Instance" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "GRAPH DATABASE SUMMARY\n", "============================================================\n", "Name: MatGraphDB\n", "Storage path: ..\\..\\data\\examples\\01\\MatGraphDB\n", "└── Repository structure:\n", " ├── nodes/ (..\\..\\data\\examples\\01\\MatGraphDB\\nodes)\n", " ├── edges/ (..\\..\\data\\examples\\01\\MatGraphDB\\edges)\n", " ├── edge_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\edge_generators)\n", " ├── node_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\node_generators)\n", " └── graph/ (..\\..\\data\\examples\\01\\MatGraphDB\\graph)\n", "\n", "############################################################\n", "NODE DETAILS\n", "############################################################\n", "Total node types: 1\n", "------------------------------------------------------------\n", "• Node type: material\n", " - Number of nodes: 80643\n", " - Number of features: 136\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE DETAILS\n", "############################################################\n", "Total edge types: 0\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "NODE GENERATOR DETAILS\n", "############################################################\n", "Total node generators: 0\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE GENERATOR DETAILS\n", "############################################################\n", "Total edge generators: 0\n", "------------------------------------------------------------\n", "\n" ] } ], "source": [ "from matgraphdb import MatGraphDB\n", "\n", "if not os.path.exists(MATGRAPHDB_PATH):\n", " shutil.rmtree(MATGRAPHDB_PATH)\n", "mdb = MatGraphDB(storage_path=MATGRAPHDB_PATH,materials_store=materials_store)\n", "\n", "print(mdb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding Nodes\n", "\n", "In this section, we will add the nodes to the MatGraphDB instance. We will be using some of the built-in node generators to add the nodes to the MatGraphDB instance." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from matgraphdb import generators\n", "\n", "# Here we define the generator functions and arguments if they are needed. \n", "# For instance, to get the materials sites and lattices, we need to pass the materials store to the generator function.\n", "node_generators = [\n", " {\"generator_func\": generators.element},\n", " {\"generator_func\": generators.chemenv},\n", " {\"generator_func\": generators.crystal_system},\n", " {\"generator_func\": generators.magnetic_state},\n", " {\"generator_func\": generators.oxidation_state},\n", " {\"generator_func\": generators.space_group},\n", " {\"generator_func\": generators.wyckoff},\n", " {\n", " \"generator_func\": generators.material_site,\n", " \"generator_args\": {\"material_store\": mdb.node_stores[\"material\"]},\n", " },\n", " {\n", " \"generator_func\": generators.material_lattice,\n", " \"generator_args\": {\"material_store\": mdb.node_stores[\"material\"]},\n", " },\n", "]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can add the node generators to the MatGraphDB instance. When we add the generator, it will immediately execute and add the nodes to the database." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Adding node generator: element\n", "Adding node generator: chemenv\n", "Adding node generator: crystal_system\n", "Adding node generator: magnetic_state\n", "Adding node generator: oxidation_state\n", "Adding node generator: space_group\n", "Adding node generator: wyckoff\n", "Adding node generator: material_site\n", "Adding node generator: material_lattice\n", "Node generators have been initialized.\n", "============================================================\n", "GRAPH DATABASE SUMMARY\n", "============================================================\n", "Name: MatGraphDB\n", "Storage path: ..\\..\\data\\examples\\01\\MatGraphDB\n", "└── Repository structure:\n", " ├── nodes/ (..\\..\\data\\examples\\01\\MatGraphDB\\nodes)\n", " ├── edges/ (..\\..\\data\\examples\\01\\MatGraphDB\\edges)\n", " ├── edge_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\edge_generators)\n", " ├── node_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\node_generators)\n", " └── graph/ (..\\..\\data\\examples\\01\\MatGraphDB\\graph)\n", "\n", "############################################################\n", "NODE DETAILS\n", "############################################################\n", "Total node types: 10\n", "------------------------------------------------------------\n", "• Node type: material\n", " - Number of nodes: 80643\n", " - Number of features: 136\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "------------------------------------------------------------\n", "• Node type: element\n", " - Number of nodes: 118\n", " - Number of features: 99\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", "------------------------------------------------------------\n", "• Node type: chemenv\n", " - Number of nodes: 67\n", " - Number of features: 15\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\chemenv\n", "------------------------------------------------------------\n", "• Node type: crystal_system\n", " - Number of nodes: 7\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\crystal_system\n", "------------------------------------------------------------\n", "• Node type: magnetic_state\n", " - Number of nodes: 5\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\magnetic_state\n", "------------------------------------------------------------\n", "• Node type: oxidation_state\n", " - Number of nodes: 19\n", " - Number of features: 3\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\oxidation_state\n", "------------------------------------------------------------\n", "• Node type: space_group\n", " - Number of nodes: 230\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\space_group\n", "------------------------------------------------------------\n", "• Node type: wyckoff\n", " - Number of nodes: 1380\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\wyckoff\n", "------------------------------------------------------------\n", "• Node type: material_site\n", " - Number of nodes: 2545026\n", " - Number of features: 15\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_site\n", "------------------------------------------------------------\n", "• Node type: material_lattice\n", " - Number of nodes: 80643\n", " - Number of features: 12\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_lattice\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE DETAILS\n", "############################################################\n", "Total edge types: 0\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "NODE GENERATOR DETAILS\n", "############################################################\n", "Total node generators: 9\n", "------------------------------------------------------------\n", "• Generator: element\n", "Generator Args:\n", " - generator_func: []\n", " - generator_kwargs.base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\imputed_periodic_table_values.parquet']\n", " - generator_name: ['element']\n", " - id: [0]\n", "Generator Kwargs:\n", " - base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\imputed_periodic_table_values.parquet']\n", "------------------------------------------------------------\n", "• Generator: chemenv\n", "Generator Args:\n", " - generator_func: []\n", " - generator_kwargs.base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\coordination_geometries.parquet']\n", " - generator_name: ['chemenv']\n", " - id: [1]\n", "Generator Kwargs:\n", " - base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\coordination_geometries.parquet']\n", "------------------------------------------------------------\n", "• Generator: crystal_system\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['crystal_system']\n", " - id: [2]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: magnetic_state\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['magnetic_state']\n", " - id: [3]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: oxidation_state\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['oxidation_state']\n", " - id: [4]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: space_group\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['space_group']\n", " - id: [5]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: wyckoff\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['wyckoff']\n", " - id: [6]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_site\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - generator_func: []\n", " - generator_name: ['material_site']\n", " - id: [7]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_lattice\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - generator_func: []\n", " - generator_name: ['material_lattice']\n", " - id: [8]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE GENERATOR DETAILS\n", "############################################################\n", "Total edge generators: 0\n", "------------------------------------------------------------\n", "\n" ] } ], "source": [ "# Add each node generator to the database.\n", "for generator in node_generators:\n", " generator_func = generator.get(\"generator_func\")\n", " generator_args = generator.get(\"generator_args\", None)\n", " print(f\"Adding node generator: {generator_func.__name__}\")\n", " mdb.add_node_generator(generator_func=generator_func, generator_args=generator_args)\n", "\n", "print(\"Node generators have been initialized.\")\n", "\n", "print(mdb)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding Edges\n", "\n", "In this section, we will add the edges to the MatGraphDB instance. We will be using some of the built-in edge generators to add the edges to the MatGraphDB instance." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Adding edge generator: element_element_neighborsByGroupPeriod\n", "Adding edge generator: element_oxiState_canOccur\n", "Adding edge generator: material_chemenv_containsSite\n", "Adding edge generator: material_crystalSystem_has\n", "Adding edge generator: material_element_has\n", "Adding edge generator: material_lattice_has\n", "Adding edge generator: material_spg_has\n", "Adding edge generator: element_chemenv_canOccur\n", "Adding edge generator: spg_crystalSystem_isApart\n", "Adding edge generator: element_element_bonds\n", "Edge generators have been initialized.\n", "============================================================\n", "GRAPH DATABASE SUMMARY\n", "============================================================\n", "Name: MatGraphDB\n", "Storage path: ..\\..\\data\\examples\\01\\MatGraphDB\n", "└── Repository structure:\n", " ├── nodes/ (..\\..\\data\\examples\\01\\MatGraphDB\\nodes)\n", " ├── edges/ (..\\..\\data\\examples\\01\\MatGraphDB\\edges)\n", " ├── edge_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\edge_generators)\n", " ├── node_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\node_generators)\n", " └── graph/ (..\\..\\data\\examples\\01\\MatGraphDB\\graph)\n", "\n", "############################################################\n", "NODE DETAILS\n", "############################################################\n", "Total node types: 10\n", "------------------------------------------------------------\n", "• Node type: material\n", " - Number of nodes: 80643\n", " - Number of features: 136\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "------------------------------------------------------------\n", "• Node type: element\n", " - Number of nodes: 118\n", " - Number of features: 99\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", "------------------------------------------------------------\n", "• Node type: chemenv\n", " - Number of nodes: 67\n", " - Number of features: 15\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\chemenv\n", "------------------------------------------------------------\n", "• Node type: crystal_system\n", " - Number of nodes: 7\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\crystal_system\n", "------------------------------------------------------------\n", "• Node type: magnetic_state\n", " - Number of nodes: 5\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\magnetic_state\n", "------------------------------------------------------------\n", "• Node type: oxidation_state\n", " - Number of nodes: 19\n", " - Number of features: 3\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\oxidation_state\n", "------------------------------------------------------------\n", "• Node type: space_group\n", " - Number of nodes: 230\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\space_group\n", "------------------------------------------------------------\n", "• Node type: wyckoff\n", " - Number of nodes: 1380\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\wyckoff\n", "------------------------------------------------------------\n", "• Node type: material_site\n", " - Number of nodes: 2545026\n", " - Number of features: 15\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_site\n", "------------------------------------------------------------\n", "• Node type: material_lattice\n", " - Number of nodes: 80643\n", " - Number of features: 12\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_lattice\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE DETAILS\n", "############################################################\n", "Total edge types: 10\n", "------------------------------------------------------------\n", "• Edge type: element_element_neighborsByGroupPeriod\n", " - Number of edges: 391\n", " - Number of features: 14\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_element_neighborsByGroupPeriod\n", "------------------------------------------------------------\n", "• Edge type: element_oxiState_canOccur\n", " - Number of edges: 162\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_oxiState_canOccur\n", "------------------------------------------------------------\n", "• Edge type: material_chemenv_containsSite\n", " - Number of edges: 2542897\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_chemenv_containsSite\n", "------------------------------------------------------------\n", "• Edge type: material_crystalSystem_has\n", " - Number of edges: 80643\n", " - Number of features: 10\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_crystalSystem_has\n", "------------------------------------------------------------\n", "• Edge type: material_element_has\n", " - Number of edges: 270902\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_element_has\n", "------------------------------------------------------------\n", "• Edge type: material_lattice_has\n", " - Number of edges: 80643\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_lattice_has\n", "------------------------------------------------------------\n", "• Edge type: material_spg_has\n", " - Number of edges: 80643\n", " - Number of features: 10\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_spg_has\n", "------------------------------------------------------------\n", "• Edge type: element_chemenv_canOccur\n", " - Number of edges: 270474\n", " - Number of features: 7\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_chemenv_canOccur\n", "------------------------------------------------------------\n", "• Edge type: spg_crystalSystem_isApart\n", " - Number of edges: 230\n", " - Number of features: 7\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\spg_crystalSystem_isApart\n", "------------------------------------------------------------\n", "• Edge type: element_element_bonds\n", " - Number of edges: 3069943\n", " - Number of features: 7\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_element_bonds\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "NODE GENERATOR DETAILS\n", "############################################################\n", "Total node generators: 9\n", "------------------------------------------------------------\n", "• Generator: element\n", "Generator Args:\n", " - generator_func: []\n", " - generator_kwargs.base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\imputed_periodic_table_values.parquet']\n", " - generator_name: ['element']\n", " - id: [0]\n", "Generator Kwargs:\n", " - base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\imputed_periodic_table_values.parquet']\n", "------------------------------------------------------------\n", "• Generator: chemenv\n", "Generator Args:\n", " - generator_func: []\n", " - generator_kwargs.base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\coordination_geometries.parquet']\n", " - generator_name: ['chemenv']\n", " - id: [1]\n", "Generator Kwargs:\n", " - base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\coordination_geometries.parquet']\n", "------------------------------------------------------------\n", "• Generator: crystal_system\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['crystal_system']\n", " - id: [2]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: magnetic_state\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['magnetic_state']\n", " - id: [3]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: oxidation_state\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['oxidation_state']\n", " - id: [4]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: space_group\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['space_group']\n", " - id: [5]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: wyckoff\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['wyckoff']\n", " - id: [6]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_site\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - generator_func: []\n", " - generator_name: ['material_site']\n", " - id: [7]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_lattice\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - generator_func: []\n", " - generator_name: ['material_lattice']\n", " - id: [8]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE GENERATOR DETAILS\n", "############################################################\n", "Total edge generators: 10\n", "------------------------------------------------------------\n", "• Generator: element_oxiState_canOccur\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - oxiState_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\oxidation_state\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_chemenv_containsSite\n", "Generator Args:\n", " - chemenv_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\chemenv\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_crystalSystem_has\n", "Generator Args:\n", " - crystal_system_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\crystal_system\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: element_element_neighborsByGroupPeriod\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_element_has\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_lattice_has\n", "Generator Args:\n", " - lattice_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_lattice\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_spg_has\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - spg_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\space_group\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: element_chemenv_canOccur\n", "Generator Args:\n", " - chemenv_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\chemenv\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: spg_crystalSystem_isApart\n", "Generator Args:\n", " - crystal_system_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\crystal_system\n", " - spg_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\space_group\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: element_element_bonds\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "\n" ] } ], "source": [ "\n", "# List of edge generator configurations.\n", "edge_generators = [\n", " {\n", " \"generator_func\": generators.element_element_neighborsByGroupPeriod,\n", " \"generator_args\": {\"element_store\": mdb.node_stores[\"element\"]},\n", " },\n", " {\n", " \"generator_func\": generators.element_oxiState_canOccur,\n", " \"generator_args\": {\n", " \"element_store\": mdb.node_stores[\"element\"],\n", " \"oxiState_store\": mdb.node_stores[\"oxidation_state\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.material_chemenv_containsSite,\n", " \"generator_args\": {\n", " \"material_store\": mdb.node_stores[\"material\"],\n", " \"chemenv_store\": mdb.node_stores[\"chemenv\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.material_crystalSystem_has,\n", " \"generator_args\": {\n", " \"material_store\": mdb.node_stores[\"material\"],\n", " \"crystal_system_store\": mdb.node_stores[\"crystal_system\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.material_element_has,\n", " \"generator_args\": {\n", " \"material_store\": mdb.node_stores[\"material\"],\n", " \"element_store\": mdb.node_stores[\"element\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.material_lattice_has,\n", " \"generator_args\": {\n", " \"material_store\": mdb.node_stores[\"material\"],\n", " \"lattice_store\": mdb.node_stores[\"material_lattice\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.material_spg_has,\n", " \"generator_args\": {\n", " \"material_store\": mdb.node_stores[\"material\"],\n", " \"spg_store\": mdb.node_stores[\"space_group\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.element_chemenv_canOccur,\n", " \"generator_args\": {\n", " \"element_store\": mdb.node_stores[\"element\"],\n", " \"chemenv_store\": mdb.node_stores[\"chemenv\"],\n", " \"material_store\": mdb.node_stores[\"material\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.spg_crystalSystem_isApart,\n", " \"generator_args\": {\n", " \"spg_store\": mdb.node_stores[\"space_group\"],\n", " \"crystal_system_store\": mdb.node_stores[\"crystal_system\"],\n", " },\n", " },\n", " {\n", " \"generator_func\": generators.element_element_bonds,\n", " \"generator_args\": {\n", " \"element_store\": mdb.node_stores[\"element\"],\n", " \"material_store\": mdb.node_stores[\"material\"],\n", " },\n", " },\n", "]\n", "\n", "\n", "# Add each edge generator to the database and run them immediately.\n", "for generator in edge_generators:\n", " generator_func = generator.get(\"generator_func\")\n", " generator_args = generator.get(\"generator_args\", None)\n", " print(f\"Adding edge generator: {generator_func.__name__}\")\n", " mdb.add_edge_generator(generator_func=generator_func, generator_args=generator_args, run_immediately=True)\n", "\n", "print(\"Edge generators have been initialized.\")\n", "print(mdb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Verifying the Database\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "GRAPH DATABASE SUMMARY\n", "============================================================\n", "Name: MatGraphDB\n", "Storage path: ..\\..\\data\\examples\\01\\MatGraphDB\n", "└── Repository structure:\n", " ├── nodes/ (..\\..\\data\\examples\\01\\MatGraphDB\\nodes)\n", " ├── edges/ (..\\..\\data\\examples\\01\\MatGraphDB\\edges)\n", " ├── edge_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\edge_generators)\n", " ├── node_generators/ (..\\..\\data\\examples\\01\\MatGraphDB\\node_generators)\n", " └── graph/ (..\\..\\data\\examples\\01\\MatGraphDB\\graph)\n", "\n", "############################################################\n", "NODE DETAILS\n", "############################################################\n", "Total node types: 10\n", "------------------------------------------------------------\n", "• Node type: material\n", " - Number of nodes: 80643\n", " - Number of features: 136\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "------------------------------------------------------------\n", "• Node type: element\n", " - Number of nodes: 118\n", " - Number of features: 99\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", "------------------------------------------------------------\n", "• Node type: chemenv\n", " - Number of nodes: 67\n", " - Number of features: 15\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\chemenv\n", "------------------------------------------------------------\n", "• Node type: crystal_system\n", " - Number of nodes: 7\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\crystal_system\n", "------------------------------------------------------------\n", "• Node type: magnetic_state\n", " - Number of nodes: 5\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\magnetic_state\n", "------------------------------------------------------------\n", "• Node type: oxidation_state\n", " - Number of nodes: 19\n", " - Number of features: 3\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\oxidation_state\n", "------------------------------------------------------------\n", "• Node type: space_group\n", " - Number of nodes: 230\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\space_group\n", "------------------------------------------------------------\n", "• Node type: wyckoff\n", " - Number of nodes: 1380\n", " - Number of features: 2\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\wyckoff\n", "------------------------------------------------------------\n", "• Node type: material_site\n", " - Number of nodes: 2545026\n", " - Number of features: 15\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_site\n", "------------------------------------------------------------\n", "• Node type: material_lattice\n", " - Number of nodes: 80643\n", " - Number of features: 12\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_lattice\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE DETAILS\n", "############################################################\n", "Total edge types: 10\n", "------------------------------------------------------------\n", "• Edge type: element_element_neighborsByGroupPeriod\n", " - Number of edges: 391\n", " - Number of features: 14\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_element_neighborsByGroupPeriod\n", "------------------------------------------------------------\n", "• Edge type: element_oxiState_canOccur\n", " - Number of edges: 162\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_oxiState_canOccur\n", "------------------------------------------------------------\n", "• Edge type: material_chemenv_containsSite\n", " - Number of edges: 2542897\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_chemenv_containsSite\n", "------------------------------------------------------------\n", "• Edge type: material_crystalSystem_has\n", " - Number of edges: 80643\n", " - Number of features: 10\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_crystalSystem_has\n", "------------------------------------------------------------\n", "• Edge type: material_element_has\n", " - Number of edges: 270902\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_element_has\n", "------------------------------------------------------------\n", "• Edge type: material_lattice_has\n", " - Number of edges: 80643\n", " - Number of features: 8\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_lattice_has\n", "------------------------------------------------------------\n", "• Edge type: material_spg_has\n", " - Number of edges: 80643\n", " - Number of features: 10\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\material_spg_has\n", "------------------------------------------------------------\n", "• Edge type: element_chemenv_canOccur\n", " - Number of edges: 270474\n", " - Number of features: 7\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_chemenv_canOccur\n", "------------------------------------------------------------\n", "• Edge type: spg_crystalSystem_isApart\n", " - Number of edges: 230\n", " - Number of features: 7\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\spg_crystalSystem_isApart\n", "------------------------------------------------------------\n", "• Edge type: element_element_bonds\n", " - Number of edges: 3069943\n", " - Number of features: 7\n", " - db_path: ..\\..\\data\\examples\\01\\MatGraphDB\\edges\\element_element_bonds\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "NODE GENERATOR DETAILS\n", "############################################################\n", "Total node generators: 9\n", "------------------------------------------------------------\n", "• Generator: element\n", "Generator Args:\n", " - generator_func: []\n", " - generator_kwargs.base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\imputed_periodic_table_values.parquet']\n", " - generator_name: ['element']\n", " - id: [0]\n", "Generator Kwargs:\n", " - base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\imputed_periodic_table_values.parquet']\n", "------------------------------------------------------------\n", "• Generator: chemenv\n", "Generator Args:\n", " - generator_func: []\n", " - generator_kwargs.base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\coordination_geometries.parquet']\n", " - generator_name: ['chemenv']\n", " - id: [1]\n", "Generator Kwargs:\n", " - base_file: ['C:\\\\Users\\\\lllang\\\\Desktop\\\\Current_Projects\\\\MatGraphDB\\\\matgraphdb\\\\utils\\\\chem_utils\\\\resources\\\\coordination_geometries.parquet']\n", "------------------------------------------------------------\n", "• Generator: crystal_system\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['crystal_system']\n", " - id: [2]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: magnetic_state\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['magnetic_state']\n", " - id: [3]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: oxidation_state\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['oxidation_state']\n", " - id: [4]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: space_group\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['space_group']\n", " - id: [5]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: wyckoff\n", "Generator Args:\n", " - generator_func: []\n", " - generator_name: ['wyckoff']\n", " - id: [6]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_site\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - generator_func: []\n", " - generator_name: ['material_site']\n", " - id: [7]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_lattice\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - generator_func: []\n", " - generator_name: ['material_lattice']\n", " - id: [8]\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "\n", "############################################################\n", "EDGE GENERATOR DETAILS\n", "############################################################\n", "Total edge generators: 10\n", "------------------------------------------------------------\n", "• Generator: element_oxiState_canOccur\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - oxiState_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\oxidation_state\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_chemenv_containsSite\n", "Generator Args:\n", " - chemenv_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\chemenv\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_crystalSystem_has\n", "Generator Args:\n", " - crystal_system_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\crystal_system\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: element_element_neighborsByGroupPeriod\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_element_has\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_lattice_has\n", "Generator Args:\n", " - lattice_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material_lattice\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: material_spg_has\n", "Generator Args:\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", " - spg_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\space_group\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: element_chemenv_canOccur\n", "Generator Args:\n", " - chemenv_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\chemenv\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: spg_crystalSystem_isApart\n", "Generator Args:\n", " - crystal_system_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\crystal_system\n", " - spg_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\space_group\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "• Generator: element_element_bonds\n", "Generator Args:\n", " - element_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\element\n", " - material_store: ..\\..\\data\\examples\\01\\MatGraphDB\\nodes\\material\n", "Generator Kwargs:\n", "------------------------------------------------------------\n", "\n" ] } ], "source": [ "print(mdb)" ] } ], "metadata": { "kernelspec": { "display_name": "matgraphdb", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" }, "nbsphinx": { "execute": "never" } }, "nbformat": 4, "nbformat_minor": 2 }